*****************************************************************
* Purpose:Construct manual/routine/abstract indices (2 sets of them) for 2 digit level occupations
* Input: BIBB data for 1992: ZA2565.dta
* Output: dataset at the level of 2-digit occupations and 3 digit occupations
********************************************************************* 	

version 14.2
clear all
macro drop _all
set linesize 90
set more off
set seed 123456789


* Open original dataset
use "data\orig\ZA2565.dta", clear

* Create occupation codes
* Occupation at the 2 digit level, i.e. v18
tab v18, sort
rename v18 occuptwo
* Occupation at the 3 digit level, i.e. v17
tab v17, sort
rename v17 occupthree
la var occupthree "Three digit occupational category"

* Rename counters
rename v1 dataid
rename v2 id

* Get weight variable renamed
rename v318 weight
la var weight "Weight / Gewicht"

*CONTROL VARIABLES
*******************
*skill measures: education and age
qui gen vocdeg = 1 if v205 == 1 
qui gen unideg = 1 if v232 == 1 | v233 == 1
qui gen nodeg = 1 if vocdeg ~= 1 & unideg ~= 1 
qui gen educat = 1 if nodeg == 1
qui replace educat = 2 if vocdeg == 1
qui replace educat = 3 if unideg == 1
cap drop nodeg vocdeg unideg
qui tab educat, gen(dedu)
qui rename dedu1 nodeg
qui rename dedu2 vocdeg
qui rename dedu3 unideg
qui gen gebjahr = v298 + 1900
sum gebjahr, de
qui gen age = 1991-gebjahr

*other controls; 
qui rename v313 bula
qui rename v21 firmsize
qui replace firmsize = . if firmsize == 9
qui rename v19 sector
qui replace sector = 4 if sector == 4 | sector == 5 | sector == 6
qui replace sector =. if sector > 6 & sector < 10
qui rename v20 industry
qui gen hourswork = v15/10
qui replace hourswork = . if v15 == 999
qui rename v293 hhsize /*substitute for married*/
gen ft25 = 0
replace ft25 = 1 if hourswork >=25
tab ft25, m
gen ft35 = 0
replace ft35 = 1 if hourswork >=35

*Using 18-55 years old; without Geringfügig Beschäftigte <= 5 hours of work***

keep if age >= 18 & age <= 55
drop if hourswork <=5

*wage monthly;
qui gen monwage = 300 if v34== 1
qui replace monwage = 800 if v34 == 2
qui replace monwage = 1250 if v34 == 3
qui replace monwage = 1750 if v34 == 5
qui replace monwage = 2250 if v34 == 7
qui replace monwage = 2750 if v34 == 8
qui replace monwage = 3250 if v34 == 9
qui replace monwage = 3750 if v34 == 10
qui replace monwage = 4250 if v34 == 11
qui replace monwage = 4750 if v34 == 12
qui replace monwage = 5250 if v34 == 13
qui replace monwage = 5750 if v34 == 14
qui replace monwage = 6500 if v34 == 16
qui replace monwage = 7500 if v34 == 17
qui replace monwage = 15000 if v34 == 18
qui replace monwage = . if v34 == 99

*generate hourly wages
qui gen hourlywage = monwage/(4.2*hourswork)
qui gen lnhwage = ln(hourlywage)

***15 wage intervals***
gen wi15 = 1 if v34 ==1
replace wi15 = 2 if v34 ==2
replace wi15 = 3 if v34 ==3
replace wi15 = 4 if v34 ==5
replace wi15 = 5 if v34 ==7
replace wi15 = 6 if v34 ==8
replace wi15 = 7 if v34 ==9
replace wi15 = 8 if v34 ==10
replace wi15 = 9 if v34 ==11
replace wi15 = 10 if v34==12
replace wi15 = 11 if v34 ==13
replace wi15 = 12 if v34 ==14
replace wi15 = 13 if v34 ==16
replace wi15 = 14 if v34 ==17
replace wi15 = 15 if v34 ==18

*** 9 modified wage intervalls***
gen wi9 = 1 if wi15 ==1 | wi15 ==2 /*0-1000*/
replace wi9 = 2 if wi15 ==3 | wi15 ==4 /*1000-2000*/
replace wi9 = 3 if wi15 ==5 | wi15 ==6 /*2000-3000*/
replace wi9 = 4 if wi15 ==7 | wi15 ==8 /*3000-4000*/
replace wi9 = 5 if wi15 ==9 | wi15 ==10 /*4000-5000*/
replace wi9 = 6 if wi15 ==11 | wi15 ==12 /*5000-6000*/
replace wi9 = 7 if wi15 ==13 /*6000-7000*/
replace wi9 = 8 if wi15 ==14 /*7000-8000*/
replace wi9 = 9 if wi15 ==15 /* > 8000*/

***Use the "Complexity" Variables***
rename v184 einzel
rename v185 wieder
rename v186 denken
rename v187 neues

***Binary categories***
gen einzelH = 0
replace einzelH = 1 if einzel ==1 | einzel ==2 

gen einzelL = 0
replace einzelL = 1 if einzel ==4 | einzel ==5
gen einzelM = 0
replace einzelM = 1 if einzel ==3

gen wiederH = 0
replace wiederH = 1 if wieder ==1 | wieder ==2
gen wiederL = 0
replace wiederL = 1 if wieder ==4 | wieder ==5
gen wiederM = 0
replace wiederM = 1 if wieder ==3

gen denkenH = 0
replace denkenH = 1 if denken ==1 | denken ==2
gen denkenL = 0
replace denkenL = 1 if denken ==4 | denken ==5
gen denkenM = 0
replace denkenM = 1 if denken ==3

gen neuesH = 0
replace neuesH = 1 if neues ==1 | neues ==2
gen neuesL = 0
replace neuesL =1 if neues ==4 | neues ==5
gen neuesM = 0
replace neuesM =1 if neues == 3

*Routine/Non-Routine Index
gen repetitive = (einzelH + wiederH)/2
gen nonrepetitive = (neuesH + denkenH)/2

***Main Tool measure***
gen pencilh = 0
replace pencilh =1 if v167 ==26
gen phoneh = 0
replace phoneh = 1 if v167 == 27
gen calculh = 0
replace calculh = 1 if v167 == 28
gen cardh = 0
replace cardh = 1 if v167 == 29
gen edv_listh = 0
replace edv_listh = 1 if v167 == 30
gen bookh = 0
replace bookh = 1 if v167 == 31
gen copyh = 0
replace copyh = 1 if v167 == 31
gen dicth = 0
replace dicth =1 if v167 == 33
gen typewriterh = 0
replace typewriterh = 1 if v167 == 34
gen cassah = 0
replace cassah =1 if v167 == 36 | v167 == 46
gen painth = 0
replace painth = 1 if v167 == 37
gen bookingh = 0
replace bookingh = 1 if v167 == 38
gen microfilmh = 0
replace microfilmh = 1 if v167 == 39
gen tvh = 0
replace tvh = 1 if v167 == 40
gen computerh = 0
replace computerh = 1 if v167 == 41 | v167 == 42
gen CADgh= 0
replace CADgh = 1 if v167 == 47
gen handcarh = 0
replace handcarh =1 if v167 == 01
gen carh = 0
replace carh = 1 if v167 == 02
gen tractorh = 0
replace tractorh =1 if v167 == 03
gen staplerh = 0
replace staplerh =1 if v167 == 04
gen kranh = 0
replace kranh = 1 if v167 == 05
gen baggerh = 0
replace baggerh = 1 if v167 == 06
gen planeh = 0
replace planeh = 1 if v167 == 07
gen hammerh = 0
replace hammerh = 1 if v167 == 10
gen needleh = 0
replace needleh = 1 if v167 ==11
gen microscopeh = 0
replace microscopeh = 1 if v167 == 12
gen thermoh = 0
replace thermoh = 1 if v167 == 13
gen schweissh = 0
replace schweissh = 1 if v167 == 14
gen bohrerh = 0
replace bohrerh = 1 if v167 == 15
gen nähh = 0
replace nähh =1 if v167 == 16
gen webh = 0
replace webh = 1 if v167 == 17
gen pumph = 0
replace pumph = 1 if v167 ==18
gen CNCh = 0
replace CNCh = 1 if v167 == 19
gen medth = 0
replace medth = 1 if v167 == 21 
gen energyth = 0
replace energyth = 1 if v167 == 22
gen chemieth = 0
replace chemieth = 1 if v167 == 23
gen hochofenh = 0
replace hochofenh = 1 if v167 == 24

*TASK VARIABLES
****************
*generate activity variable
qui gen maschin=0
qui replace maschin=1 if v38==1|v39==1
qui gen reparier=0
qui replace reparier=1 if v40==1|v42==1
qui gen anbauen=0
qui replace anbauen=1 if v43==1|v44==1
qui gen erzeugen=0
qui replace erzeugen=1 if v45==1
qui gen bauen=0
qui replace bauen=1 if v46==1
qui gen bewirten=0
qui replace bewirten=1 if v47==1
qui gen reinigen=0
qui replace reinigen=1 if v48==1|v49==1
qui gen packen=0
qui replace packen=1 if v50==1
qui gen steuern = 0
qui gen archiv = 0
replace archiv = 1 if v51 ==1
qui replace steuern = 1 if v41==1
qui gen forschen=0
qui replace forschen=1 if v52==1
qui gen planen=0
qui replace planen=1 if v53==1
gen schreiben = 0
replace schreiben = 1 if v55 ==1

gen schreibenR = 0
replace schreibenR = 1 if v55 ==1 & einzelH ==1  & wiederH ==1
replace schreibenR = 1 if v55 ==1 & v167==32
replace schreibenR = 1 if v55 ==1 & v167 ==29
replace schreibenR = 1 if v55 ==1 & v167 ==30
replace schreibenR = 1 if v55 ==1 & v167==35
replace schreibenR = 1 if v55 ==1 & v167==39

gen schreibenNR = 0
replace schreibenNR = 1 if v55 ==1 
replace schreibenNR = 0 if schreibenR ==1

gen kalkul = 0
replace kalkul = 1 if v56 ==1

gen kalkulR=0
replace kalkulR=1 if v56 ==1 & einzelH ==1 & wiederH ==1
replace kalkulR = 1 if v56 ==1 & v167==38
replace kalkulR = 1 if v56 ==1 & v167 ==28

gen kalkulNR=0
replace kalkulNR=1 if v56==1 
replace kalkulNR = 0 if kalkulR ==1

gen kaufen = 0
replace kaufen = 1 if v54 ==1

gen kaufenR = 0
replace kaufenR =1 if v54 ==1& einzelH ==1 & wiederH ==1
replace kaufenR = 1 if v54 ==1 & v167 ==36
replace kaufenR = 1 if v54 ==1 & v167 ==46

gen kaufenNR =0
replace kaufenNR =1 if v54 ==1 
replace kaufenNR = 0 if kaufenR ==1

qui gen edv=0
qui replace edv=1 if v57==1
qui gen sichern=0
qui replace sichern=1 if v58==1
qui gen gesetz=0
qui replace gesetz=1 if v59==1
qui gen erziehen=0
qui replace erziehen=1 if v60==1
qui gen pflegen=0
qui replace pflegen=1 if v61==1
qui gen publi=0
qui replace publi=1 if v62==1
qui gen anleiten=0
qui replace anleiten=1 if v63==1|v64==1

* Back to page 1
egen sumtaet = rowtotal(maschin bauen erzeugen pflegen edv sichern archiv reparier bewirten steuern forschen planen kaufenR kaufenNR schreibenR kalkulR  gesetz erziehen publi anleiten)

* Construct AFL measure constructed equivalently to
* Antonczyk, D., Fitzenberger, B., & Leuschner, U. (2009). 
* Can a Task-Based Approach Explain the Recent Changes in the German Wage Structure? 
* Journal of Economics and Statistics (Jahrbuecher Fuer Nationaloekonomie Und Statistik), 
* 229(2-3), 214–238. Retrieved from http://ideas.repec.org/a/jns/jbstat/v229y2009i2-3p214-238.html
* Page 8 (of the iza version)	

* Manual
egen summanual = rowtotal(bewirten  reparier  steuern  sichern pflegen)
gen aflmanual = summanual/sumtaet
drop summanual
la var aflmanual "AFL  task index for manual tasks"	

* Routine
egen sumroutine = rowtotal(maschin  erzeugen bauen kalkulR  schreibenR  kaufenR archiv)
gen aflroutine = sumroutine/sumtaet
drop sumroutine
la var aflroutine "AFL  task index for routine tasks"

* Abstract
egen sumabstract = rowtotal(planen  gesetz  forschen edv erziehen  kaufenNR  publi  anleiten)
gen aflabstract = sumabstract/sumtaet
drop sumabstract
la var aflabstract "AFL  task index for abstract tasks"				

* Look at distribution of tasks in the individual dataset
foreach var in manual routine abstract {
	bys occupthree : egen meanafl`var' = mean(afl`var')
	}
egen maxtask = rowmax(meanaflmanual meanaflabstract meanaflroutine)
* Then gen categorical variable
gen taskthreedigit = .
replace taskthreedigit = 1 if maxtask==meanaflmanual
replace taskthreedigit = 2 if maxtask==meanaflroutine
replace taskthreedigit = 3 if maxtask==meanaflabstract
la def tasktype 1 "Manual" 2 "Routine" 3 "Abstract"
la val taskthreedigit tasktype
la var taskthreedigit "Type of task, based on three digit occupation"
tab taskthreedigit [aw=weight]

* Need to preserve since I will use the same dataset below
preserve

* Now just need to aggregate and save the dataset with one observation per 2-digit occupation
collapse repetitive nonrepetitive aflmanual aflroutine aflabstract [pw=weight], by(occuptwo)
		
sort occuptwo
* Label
la var repetitive "Share of people reporting that occupation is repetitive"
la var nonrepetitive "Share of people reporting that occupation is new/challenging"
la var occuptwo "Two digit occupational category"
la var aflabstract "Mean of AFL  task index for abstract tasks"
la var aflroutine "Mean of AFL  task index for routine tasks"
la var aflmanual "Mean of AFL  task index for manual tasks"	

* Generate a categorical variable for each occupation, either 'manual', 'routine', or 'abstract'
* First get the max of the three
egen maxtask = rowmax(aflmanual aflabstract aflroutine)
* Then gen categorical variable
gen tasktwodigit = .
replace tasktwodigit = 1 if maxtask==aflmanual
replace tasktwodigit = 2 if maxtask==aflroutine
replace tasktwodigit = 3 if maxtask==aflabstract
la val tasktwodigit tasktype
la var tasktwodigit "Type of task, based on two digit occupation"

* Save for being merged into the individual dataset
rename * bibb*
rename bibboccuptwo occuptwo
* Rename occupational variable so that it is the same in both datasets

compress
save "data\bibb_task_2d.dta", replace


* THREE DIGIT OCCUPATIONS WHEN POSSIBLE
* We use 3 level occupations for those occupations wher the information is good
restore

* Need to calculate frequency of each occupation
bys occupthree : gen freq = _N
* We use three digit occupations unless there are less than 10 people
* on which this is based, which is around one percent of cases
* For those cases we will use two digit occupations
drop if freq<10
drop freq

* Now just need to aggregate and save the dataset with one observation per 2-digit occupation
collapse repetitive nonrepetitive aflabstract aflroutine aflmanual [pw=weight], by(occupthree)
		
sort occupthree
* Label
la var repetitive "Share of people reporting that occupation is repetitive"
la var nonrepetitive "Share of people reporting that occupation is new/challenging"
la var occupthree "Three digit occupational category"
la var aflabstract "Mean of AFL  task index for abstract tasks"
la var aflroutine "Mean of AFL  task index for routine tasks"
la var aflmanual "Mean of AFL  task index for manual tasks"	

* Generate a categorical variable for each occupation, either 'manual', 'routine', or 'abstract'
* First get the max of the three
egen maxtask = rowmax(aflmanual aflabstract aflroutine)
* Then gen categorical variable
gen taskthreedigit = .
replace taskthreedigit = 1 if maxtask==aflmanual
replace taskthreedigit = 2 if maxtask==aflroutine
replace taskthreedigit = 3 if maxtask==aflabstract
la val taskthreedigit tasktype
la var taskthreedigit "Type of task, based on three digit occupation"

* Save for being merged into the individual dataset
rename * bibb*
rename bibboccupthree occupthree
* Rename occupational variable so that it is the same in both datasets

* Next once I know it I will need to change the name of the occupation variable
* so that it works with the merge
compress
save "data\bibb_task_3d.dta", replace

exit
